Load the required libraries.
library(tidyverse)
library(sf)
library(here)
library(readxl)
library(scales)
library(DT)
library(brms)
library(tidybayes)
library(patchwork)
library(marginaleffects)
library(ggrepel)
library(scico)
library(ggdensity)
library(ggpubr)
library(ggsn)
Functions that we will use throughout the script
#labeller for years
year_labels <- c(1950:1963)
#The Glasgow mass minuture chest X-ray campaign happened between 11th March and 12th April 1957
#Segment for graphs to match ACF period
acf_start <- decimal_date(ymd("1957-03-11"))
acf_end <- decimal_date(ymd("1957-04-12"))
Function for counterfactual plots
plot_counterfactual <- function(model_data, model, population_denominator, outcome, grouping_var=NULL, ...){
#labeller for years
year_labels <- c(1950:1963)
#The Glasgow mass minuture chest X-ray campaign happened between 11th March and 12th April 1957
#Segment for graphs to match ACF period
acf_start <- decimal_date(ymd("1957-03-11"))
acf_end <- decimal_date(ymd("1957-04-12"))
summary <- {{model_data}} %>%
select(year, year2, y_num, acf_period, {{population_denominator}}, {{outcome}}, {{grouping_var}}) %>%
add_epred_draws({{model}}) %>%
group_by(year2, acf_period, {{grouping_var}}) %>%
median_qi() %>%
mutate(.epred_inc = .epred/{{population_denominator}}*100000,
.epred_inc.lower = .epred.lower/{{population_denominator}}*100000,
.epred_inc.upper = .epred.upper/{{population_denominator}}*100000) %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Before Intervention",
acf_period=="c. post-acf" ~ "Post Intervention"))
#create the counterfactual (no intervention), and summarise
counterfact <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}, {{outcome}}) %>%
mutate(acf_period = "a. pre-acf")) %>%
group_by(year2, acf_period, {{grouping_var}}) %>%
median_qi() %>%
mutate(.epred_inc = .epred/{{population_denominator}}*100000,
.epred_inc.lower = .epred.lower/{{population_denominator}}*100000,
.epred_inc.upper = .epred.upper/{{population_denominator}}*100000) %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Before Intervention",
acf_period=="c. post-acf" ~ "Post Intervention"))
#plot the intervention effect
p <- summary %>%
droplevels() %>%
ggplot() +
geom_ribbon(aes(ymin=.epred_inc.lower, ymax=.epred_inc.upper, x=year2, group = acf_period, fill=acf_period), alpha=0.5) +
geom_ribbon(data = counterfact %>% filter(year>=1956),
aes(ymin=.epred_inc.lower, ymax=.epred_inc.upper, x=year2, fill="Counterfactual"), alpha=0.5) +
geom_line(data = counterfact %>% filter(year>=1956),
aes(y=.epred_inc, x=year2, colour="Counterfactual")) +
geom_line(aes(y=.epred_inc, x=year2, group=acf_period, colour=acf_period)) +
geom_point(data = {{model_data}}, aes(y={{outcome}}, x=year2, shape=acf_period), size=2) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
theme_ggdist() +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
scale_fill_manual(values = c("#DE0D92", "grey50", "#4D6CFA") , name="") +
scale_colour_manual(values = c("#DE0D92", "grey50", "#4D6CFA") , name="") +
scale_shape_discrete(name="") +
labs(
x = "Year",
y = "Case notification rate (per 100,000)",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme(legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA),
title = element_text(size=14),
axis.text.x = element_text(size=10, angle = 90, hjust=1, vjust=0.5),
legend.text = element_text(size=12)) +
guides(shape="none")
facet_vars <- vars(...)
if (length(facet_vars) != 0) {
p <- p + facet_wrap(facet_vars)
}
p
}
Function for calculating measures of change over time
summarise_change <- function(model_data, model, population_denominator, grouping_var=NULL){
#a. immediate change
nd_immediate <- {{model_data}} %>%
filter(year %in% c(1956:1957)) %>%
select(acf_period, year, y_num, {{population_denominator}}, {{grouping_var}})
#Calcuate incidence per draw, then summarise.
immediate_change <- add_epred_draws({{model}},
newdata=nd_immediate) %>%
mutate(epred_inc100k = .epred/{{population_denominator}}) %>%
group_by(.draw, {{grouping_var}}) %>%
mutate(acf_inc100k_diff = last(epred_inc100k)-first(epred_inc100k),
acf_inc100k_rr = last(epred_inc100k)/first(epred_inc100k)) %>%
ungroup() %>%
group_by({{grouping_var}}) %>%
mean_qi(acf_inc100k_diff, acf_inc100k_rr) %>%
mutate(change = "Immediate change") %>%
ungroup()
#b. post-ACF change
nd_post <- {{model_data}} %>%
filter(year %in% c(1956,1958)) %>%
select(acf_period, year, y_num, {{population_denominator}}, {{grouping_var}})
#Calcuate incidence per draw, then summarise.
post_change <- add_epred_draws({{model}},
newdata=nd_post) %>%
mutate(epred_inc100k = .epred/{{population_denominator}}) %>%
group_by(.draw, {{grouping_var}}) %>%
mutate(acf_inc100k_diff = last(epred_inc100k)-first(epred_inc100k),
acf_inc100k_rr = last(epred_inc100k)/first(epred_inc100k)) %>%
ungroup() %>%
group_by({{grouping_var}}) %>%
mean_qi(acf_inc100k_diff, acf_inc100k_rr) %>%
mutate(change = "Post-ACF change") %>%
ungroup()
#c. change in slope post vs. pre-ACF
slope_change <- {{model_data}} %>%
select(year, year2, y_num, acf_period, {{population_denominator}}, {{grouping_var}}) %>%
filter(year!=1957) %>%
add_epred_draws({{model}}) %>%
mutate(inc_100k = .epred/{{population_denominator}}*100000) %>%
group_by(year, {{grouping_var}}, acf_period, ) %>%
mean_qi(inc_100k) %>%
ungroup() %>%
mutate(n_years = length(year), .by=c(acf_period, {{grouping_var}})) %>%
summarise(pct_change_epred_overall = (((last(inc_100k) - first(inc_100k))/first(inc_100k))),
pct_change_lower_overall = (((last(.lower) - first(.lower))/first(.lower))),
pct_change_upper_overall = (((last(.upper) - first(.upper))/first(.upper))),
pct_change_epred_annual = (((last(inc_100k) - first(inc_100k))/first(inc_100k))/n_years),
pct_change_lower_annual = (((last(.lower) - first(.lower))/first(.lower))/n_years),
pct_change_upper_annual = (((last(.upper) - first(.upper))/first(.upper))/n_years),
.by = c(acf_period, {{grouping_var}})) %>%
distinct() %>%
mutate(change = "Slope change")
lst(immediate_change, post_change, slope_change)
}
Function for calculating difference from counterfactual
calcuate_counterfactual <- function(model_data, model, population_denominator, grouping_var=NULL){
#effect vs. counterfactual
counterfact <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}) %>%
mutate(acf_period = "a. pre-acf")) %>%
group_by(.draw, year, {{grouping_var}}, acf_period) %>%
mutate(.epred_inc_counterf = .epred/{{population_denominator}}*100000, .epred_counterf=.epred) %>%
filter(year>1957) %>%
ungroup() %>%
select(year, {{population_denominator}}, .draw, .epred_counterf, .epred_inc_counterf)
#Calcuate incidence per draw, then summarise.
post_change <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}, acf_period)) %>%
group_by(.draw, year, {{grouping_var}}, acf_period) %>%
mutate(.epred_inc = .epred/{{population_denominator}}*100000) %>%
filter(year>1957) %>%
ungroup() %>%
select(year, {{population_denominator}}, {{grouping_var}}, .draw, .epred, .epred_inc)
#for the overall period
counterfact_overall <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}) %>%
mutate(acf_period = "a. pre-acf")) %>%
group_by(.draw, {{grouping_var}}) %>%
filter(year>1957) %>%
ungroup() %>%
select({{population_denominator}}, .draw, .epred) %>%
group_by(.draw) %>%
summarise(.epred_counterf = sum(.epred)) %>%
mutate(year = "Overall (1958-1963)")
#Calcuate incidence per draw, then summarise.
post_change_overall <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}, acf_period)) %>%
group_by(.draw, {{grouping_var}}) %>%
filter(year>1957) %>%
ungroup() %>%
select({{population_denominator}}, {{grouping_var}}, .draw, .epred) %>%
group_by(.draw, {{grouping_var}}) %>%
summarise(.epred = sum(.epred))
counter_post <-
left_join(counterfact, post_change) %>%
mutate(cases_averted = .epred_counterf-.epred,
pct_change = (.epred - .epred_counterf)/.epred_counterf,
diff_inc100k = .epred_inc - .epred_inc_counterf,
rr_inc100k = .epred_inc/.epred_inc_counterf) %>%
group_by(year, {{grouping_var}}) %>%
mean_qi(cases_averted, pct_change, diff_inc100k, rr_inc100k) %>%
ungroup()
counter_post_overall <-
left_join(counterfact_overall, post_change_overall) %>%
mutate(cases_averted = .epred_counterf-.epred,
pct_change = (.epred - .epred_counterf)/.epred_counterf) %>%
group_by({{grouping_var}}) %>%
mean_qi(cases_averted, pct_change) %>%
ungroup() %>%
mutate(year = "Overall (1958-1963)")
lst(counter_post, counter_post_overall)
}
Function for tidying up counterfactuals (mostly for making nice tables)
tidy_counterfactuals <- function(data){
data %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
mutate(year = as.character(year),
cases_averted = glue::glue("{cases_averted} ({cases_averted.lower} to {cases_averted.upper})"),
pct_change = glue::glue("{pct_change} ({pct_change.lower} to {pct_change.upper})"),
diff_inc = glue::glue("{diff_inc100k} ({diff_inc100k.lower} to {diff_inc100k.upper})"),
rr_inc = glue::glue("{rr_inc100k} ({rr_inc100k.lower} to {rr_inc100k.upper})"))
}
tidy_counterfactuals_overall <- function(data){
data %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
mutate(year = as.character(year),
cases_averted = glue::glue("{cases_averted} ({cases_averted.lower} to {cases_averted.upper})"),
pct_change = glue::glue("{pct_change} ({pct_change.lower} to {pct_change.upper})"))
}
Import datasets for analysis
Import data from Jonathan Golub’s paper (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4472641/), and summarise in a figure
golub <- read_xlsx("2024_01_10_golub.xlsx")
golub_cxr <- golub %>%
filter(!is.na(mass_cxr)) %>%
separate(year_country, into = c("year", "country")) %>%
mutate(year = as.numeric(year)) %>%
filter(year<1980) %>%
mutate(target_population = str_replace_all(sample, " ", ""),
target_population = str_extract(target_population, "\\d+"))
Warning: Expected 2 pieces. Additional pieces discarded in 3 rows [6, 12, 17].
Make a map of Glasgow wards
glasgow_wards_1951 <- st_read(here("mapping/glasgow_wards_1951.geojson"))
Reading layer `glasgow_wards_1951' from data source `/Users/petermacpherson/Dropbox/Projects/Historical TB ACF 2023-11-28/Work/analysis/glasgow-cxr/mapping/glasgow_wards_1951.geojson' using driver `GeoJSON'
Simple feature collection with 37 features and 3 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -4.393502 ymin: 55.77464 xmax: -4.070411 ymax: 55.92814
Geodetic CRS: WGS 84
#read in Scotland boundary
scotland <- st_read(here("mapping/Scotland_boundary/Scotland boundary.shp"))
Reading layer `Scotland boundary' from data source
`/Users/petermacpherson/Dropbox/Projects/Historical TB ACF 2023-11-28/Work/analysis/glasgow-cxr/mapping/Scotland_boundary/Scotland boundary.shp' using driver `ESRI Shapefile'
Simple feature collection with 1 feature and 1 field
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 5513 ymin: 530249 xmax: 470332 ymax: 1220302
Projected CRS: OSGB36 / British National Grid
#make a bounding box for Glasgow
bbox <- st_bbox(glasgow_wards_1951) |> st_as_sfc()
#plot scotlan with a bounding box around the City of Glasgow
scotland_with_bbox <- ggplot() +
geom_sf(data = scotland, fill="antiquewhite") +
geom_sf(data = bbox, colour = "#C60C30", fill="antiquewhite") +
theme_void() +
theme(panel.border = element_rect(colour = "grey78", fill=NA, linewidth = 0.5),
panel.background = element_rect(fill = "#EAF7FA", size = 0.3))
Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
Please use the `linewidth` argument instead.
#plot the wards
#note we tidy up some names to fit on map
glasgow_ward_map <- glasgow_wards_1951 %>%
mutate(ward = case_when(ward=="Shettleston and Tollcross" ~ "Shettleston and\nTollcross",
ward=="Partick (West)" ~ "Partick\n(West)",
ward=="Partick (East)" ~ "Partick\n(East)",
ward=="North Kelvin" ~ "North\nKelvin",
ward=="Kinning Park" ~ "Kinning\nPark",
TRUE ~ ward)) %>%
ggplot() +
geom_sf(aes(fill=division)) +
geom_sf_label(aes(label = ward), size=3, fill=NA, label.size = NA, colour="black", family = "Segoe UI") +
#scale_colour_identity() +
scale_fill_brewer(palette = "Set3", name="City of Glasgow Division") +
theme_grey(base_family = "Segoe UI") +
labs(x="",
y="",
fill="Division") +
theme(legend.position = "top",
panel.border = element_rect(colour = "grey78", fill=NA, linewidth = 0.5),
panel.background = element_rect(fill = "antiquewhite", size = 0.3),
panel.grid.major = element_line(color = "grey78")) +
guides(fill=guide_legend(title.position = "top", title.hjust = 0.5, title.theme = element_text(face="bold"))) +
scalebar(glasgow_wards_1951, dist = 2, dist_unit = "km",
transform = TRUE, model = "WGS84", location="bottomleft")
#add the map of scotland as an inset
glasgow_ward_map + inset_element(scotland_with_bbox, 0.75, 0, 1, 0.4)
ggsave(here("figures/s1.png"), height=10, width = 12)
NA
NA
Load in the datasets for denonomiators, and check for consistency.
overall_pops <- read_xlsx(path = "2023-11-28_glasgow-acf.xlsx", sheet = "overall_population")
overall_pops %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
#shift year to midpoint
overall_pops <- overall_pops %>%
mutate(year2 = year+0.5)
Note, we have three population estimates:
(Population in shipping is estimated from the 1951 census, so is the same for most years)
First, plot the total population
overall_pops %>%
ggplot() +
geom_area(aes(y=total_population, x=year2), alpha=0.5, colour = "mediumseagreen", fill="mediumseagreen") +
geom_point(aes(y=total_population, x=year2), colour = "mediumseagreen") +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
labs(
title = "Glasgow Corporation: total population",
subtitle = "1950 to 1963",
x = "Year",
y = "Population",
caption = "Mid-year estimates\nMass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist()
NA
NA
Now the population excluding institutionalised and shipping population
overall_pops %>%
ggplot() +
geom_area(aes(y=population_without_inst_ship, x=year2), alpha=0.5, colour = "purple", fill="purple") +
geom_point(aes(y=population_without_inst_ship, x=year2), colour = "purple") +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
labs(
title = "Glasgow Corporation: population excluding institutionalised and shipping",
subtitle = "1950 to 1963",
x = "Year",
y = "Population",
caption = "Mid-year estimates\nMass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist()
NA
NA
There are 5 Divisions containing 37 Wards in the Glasgow Corporation, with consistent boundaries over time.
#look-up table for divisions and wards
ward_lookup <- read_xlsx(path = "2023-11-28_glasgow-acf.xlsx", sheet = "divisions_wards")
ward_pops <- read_xlsx(path = "2023-11-28_glasgow-acf.xlsx", sheet = "ward_population")
ward_pops %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
#shift year to midpoint
ward_pops <- ward_pops %>%
mutate(year2 = year+0.5)
#Get the Division population
division_pops <- ward_pops %>%
group_by(division, year) %>%
summarise(population_without_inst_ship = sum(population_without_inst_ship, na.rm = TRUE),
institutions = sum(institutions, na.rm = TRUE),
shipping = sum(shipping, na.rm = TRUE),
total_population = sum(total_population, na.rm = TRUE))
`summarise()` has grouped output by 'division'. You can override using the `.groups` argument.
division_pops %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
NA
Plot the overall population by Division and Ward
division_pops %>%
mutate(year2 = year+0.5) %>%
ggplot() +
geom_area(aes(y=total_population, x=year2, colour=division, fill=division), alpha=0.8) +
geom_point(aes(y=total_population, x=year2, colour=division)) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
facet_wrap(division~.) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
scale_fill_brewer(palette = "Set3", name = "") +
scale_colour_brewer(palette = "Set3", name = "") +
labs(
title = "Glasgow Corporation: total population by Division",
subtitle = "1950 to 1963",
x = "Year",
y = "Population",
caption = "Mid-year estimates\nMass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist() +
theme(legend.position = "bottom")
NA
NA
ward_pops %>%
ggplot() +
geom_area(aes(y=total_population, x=year2, colour=division, fill=division), alpha=0.8) +
geom_point(aes(y=total_population, x=year2, colour=division)) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
facet_wrap(ward~., ncol=6) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
scale_fill_brewer(palette = "Set3", name="Division") +
scale_colour_brewer(palette = "Set3", name = "Division") +
labs(
title = "Glasgow City: total population by Ward",
subtitle = "1950 to 1963",
x = "Year",
y = "Population",
caption = "Mid-year estimates\nMass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist() +
theme(legend.position = "bottom")
ggsave(here("figures/s2.png"), height=10, width=12)
Approximately, how many person-years of follow-up do we have?
overall_pops %>%
ungroup() %>%
summarise(across(year, length, .names = "years"),
across(c(population_without_inst_ship, total_population), sum)) %>%
mutate(across(where(is.double), comma)) %>%
datatable()
NA
NA
Change in population by ward
ward_pops %>%
group_by(ward) %>%
summarise(pct_change_pop = (last(population_without_inst_ship) - first(population_without_inst_ship))/first(population_without_inst_ship)) %>%
mutate(pct_change_pop = percent(pct_change_pop)) %>%
arrange(pct_change_pop) %>%
datatable()
NA
NA
NA
age_sex <- read_xlsx(path = "2023-11-28_glasgow-acf.xlsx", sheet = "age_sex_population") %>%
pivot_longer(cols = c(male, female),
names_to = "sex")
#collapse down to smaller age groups to be manageable
age_sex <- age_sex %>%
ungroup() %>%
mutate(age = case_when(age == "0 to 4" ~ "00 to 04",
age == "5 to 9" ~ "05 to 14",
age == "10 to 14" ~ "05 to 14",
age == "15 to 19" ~ "15 to 24",
age == "20 to 24" ~ "15 to 24",
age == "25 to 29" ~ "25 to 34",
age == "30 to 34" ~ "25 to 34",
age == "35 to 39" ~ "35 to 44",
age == "40 to 44" ~ "35 to 44",
age == "45 to 49" ~ "45 to 59",
age == "50 to 54" ~ "45 to 59",
age == "55 to 59" ~ "45 to 59",
TRUE ~ "60 & up")) %>%
group_by(year, age, sex) %>%
mutate(value = sum(value)) %>%
ungroup()
m_age_sex <- lm(value ~ splines::ns(year, knots = 3)*age*sex, data = age_sex)
summary(m_age_sex)
Warning: essentially perfect fit: summary may be unreliable
Call:
lm(formula = value ~ splines::ns(year, knots = 3) * age * sex,
data = age_sex)
Residuals:
Min 1Q Median 3Q Max
-1.185e-10 0.000e+00 0.000e+00 0.000e+00 1.185e-10
Coefficients: (14 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.222e+04 2.040e-10 2.559e+14 <2e-16 ***
splines::ns(year, knots = 3)1 -8.043e+03 4.071e-10 -1.976e+13 <2e-16 ***
splines::ns(year, knots = 3)2 NA NA NA NA
age05 to 14 3.669e+04 2.499e-10 1.468e+14 <2e-16 ***
age15 to 24 -3.893e+03 2.499e-10 -1.558e+13 <2e-16 ***
age25 to 34 -3.996e+04 2.499e-10 -1.599e+14 <2e-16 ***
age35 to 44 -4.230e+04 2.499e-10 -1.693e+14 <2e-16 ***
age45 to 59 5.459e+04 2.356e-10 2.317e+14 <2e-16 ***
age60 & up 7.533e+04 2.204e-10 3.418e+14 <2e-16 ***
sexmale 3.374e+03 2.886e-10 1.169e+13 <2e-16 ***
splines::ns(year, knots = 3)1:age05 to 14 -1.863e+03 4.985e-10 -3.737e+12 <2e-16 ***
splines::ns(year, knots = 3)2:age05 to 14 NA NA NA NA
splines::ns(year, knots = 3)1:age15 to 24 7.533e+04 4.985e-10 1.511e+14 <2e-16 ***
splines::ns(year, knots = 3)2:age15 to 24 NA NA NA NA
splines::ns(year, knots = 3)1:age25 to 34 1.325e+05 4.985e-10 2.658e+14 <2e-16 ***
splines::ns(year, knots = 3)2:age25 to 34 NA NA NA NA
splines::ns(year, knots = 3)1:age35 to 44 1.380e+05 4.985e-10 2.769e+14 <2e-16 ***
splines::ns(year, knots = 3)2:age35 to 44 NA NA NA NA
splines::ns(year, knots = 3)1:age45 to 59 3.474e+03 4.700e-10 7.390e+12 <2e-16 ***
splines::ns(year, knots = 3)2:age45 to 59 NA NA NA NA
splines::ns(year, knots = 3)1:age60 & up -8.453e+04 4.397e-10 -1.923e+14 <2e-16 ***
splines::ns(year, knots = 3)2:age60 & up NA NA NA NA
splines::ns(year, knots = 3)1:sexmale -1.994e+03 5.757e-10 -3.464e+12 <2e-16 ***
splines::ns(year, knots = 3)2:sexmale NA NA NA NA
age05 to 14:sexmale 1.053e+04 3.534e-10 2.980e+13 <2e-16 ***
age15 to 24:sexmale 2.352e+04 3.534e-10 6.656e+13 <2e-16 ***
age25 to 34:sexmale 1.355e+04 3.534e-10 3.833e+13 <2e-16 ***
age35 to 44:sexmale -1.727e+03 3.534e-10 -4.888e+12 <2e-16 ***
age45 to 59:sexmale 2.774e+03 3.332e-10 8.324e+12 <2e-16 ***
age60 & up:sexmale -7.761e+04 3.117e-10 -2.490e+14 <2e-16 ***
splines::ns(year, knots = 3)1:age05 to 14:sexmale -2.049e+04 7.051e-10 -2.906e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age05 to 14:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age15 to 24:sexmale -6.780e+04 7.051e-10 -9.616e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age15 to 24:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age25 to 34:sexmale -3.804e+04 7.051e-10 -5.396e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age25 to 34:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age35 to 44:sexmale -1.171e+04 7.051e-10 -1.661e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age35 to 44:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age45 to 59:sexmale -3.473e+04 6.647e-10 -5.224e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age45 to 59:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age60 & up:sexmale 1.056e+05 6.218e-10 1.698e+14 <2e-16 ***
splines::ns(year, knots = 3)2:age60 & up:sexmale NA NA NA NA
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3.074e-11 on 44 degrees of freedom
Multiple R-squared: 1, Adjusted R-squared: 1
F-statistic: 6.006e+29 on 27 and 44 DF, p-value: < 2.2e-16
age_levels <- age_sex %>% select(age) %>% distinct() %>% pull()
age_sex_nd <-
crossing(
age=age_levels,
sex=c("male", "female"),
year = 1950:1963
)
pred_pops <- age_sex_nd %>% modelr::add_predictions(m_age_sex)
Warning: prediction from a rank-deficient fit may be misleading
pred_pops %>%
ggplot(aes(x=year, y=pred, colour=age)) +
geom_line() +
geom_point() +
facet_grid(sex~.) +
scale_y_continuous(labels = comma, limits = c(0, 125000))
#How well do they match up with our overall populations?
pred_pops %>%
group_by(year) %>%
summarise(sum_pred_pop = sum(pred)) %>%
right_join(overall_pops) %>%
select(year, sum_pred_pop, population_without_inst_ship, total_population) %>%
pivot_longer(cols = c(sum_pred_pop, population_without_inst_ship, total_population)) %>%
ggplot(aes(x=year, y=value, colour=name)) +
geom_point() +
scale_y_continuous(labels = comma, limits = c(800000, 1250000))
Joining with `by = join_by(year)`
pred_pops %>%
group_by(year, sex) %>%
summarise(sum = sum(pred)) %>%
group_by(year) %>%
mutate(sex_ratio = first(sum)/last(sum))
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
What percentage of adults (15+ participated in the intervention in 1957)?
pred_pops %>%
ungroup() %>%
filter(year==1957) %>%
filter(age != "00 to 04",
age != "05 to 14") %>%
summarise(total_pop = sum(pred)) %>%
mutate(cxr_screened = 714915) %>%
mutate(pct_pop_cxr_screened = percent(cxr_screened/total_pop))
pred_pops %>%
ungroup() %>%
filter(year==1957) %>%
filter(age != "00 to 04",
age != "05 to 14") %>%
summarise(total_pop = sum(pred), .by=sex) %>%
mutate(cxr_screened = c(340474, 281875)) %>%
mutate(pct_pop_cxr_screened = percent(cxr_screened/total_pop))
NA
NA
Population pyramids
label_abs <- function(x) {
comma(abs(x))
}
pred_pops %>%
ungroup() %>%
group_by(year) %>%
mutate(year_pop = sum(pred),
age_sex_pct = percent(pred/year_pop, accuracy=0.1)) %>%
mutate(sex = case_when(sex=="male" ~ "Male",
sex=="female" ~ "Female")) %>%
ggplot(
aes(x = age, fill = sex,
y = ifelse(test = sex == "Female",yes = -pred, no = pred))) +
geom_bar(stat = "identity") +
geom_text(aes(label = age_sex_pct),
position= position_stack(vjust=0.5), colour="white", size=2.5) +
facet_wrap(year~., ncol=7) +
coord_flip() +
scale_y_continuous(labels = label_abs) +
scale_fill_manual(values = c("mediumseagreen", "purple"), name="") +
theme_ggdist() +
theme(axis.text.x = element_text(angle=90, hjust = 1, vjust=0.5),
legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA)) +
labs(x="", y="")
ggsave(here("figures/s3.png"), width=10)
Saving 10 x 4.51 in image
Not perfect, but resonably good. But ahhhhh… the age groups don’t align with the case notification age groups! Come back to think about this later.
Import the tuberculosis cases dataset
Overall, by year.
cases_by_year <- read_xlsx("2023-11-28_glasgow-acf.xlsx", sheet = "by_year")
cases_by_year%>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
#shift year to midpoint
cases_by_year <- cases_by_year %>%
mutate(year2 = year+0.5)
Plot the overall number of case notified per year, by pulmonary and extra pulmonary classification.
cases_by_year %>%
select(-total_notifications, -year) %>%
pivot_longer(cols = c(pulmonary_notifications, `non-pulmonary_notifications`)) %>%
mutate(name = case_when(name == "pulmonary_notifications" ~ "Pulmonary TB",
name == "non-pulmonary_notifications" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=value, x=year2, group = name, fill=name), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis notifications",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Number of cases",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist() +
theme(legend.position = "bottom")
NA
NA
Read in the datasets and merge together.
#list all the sheets
all_sheets <- excel_sheets("2023-11-28_glasgow-acf.xlsx")
#get the ward sheets
ward_sheets <- enframe(all_sheets) %>%
filter(grepl("by_ward", value)) %>%
pull(value)
cases_by_ward_sex_year <- map_df(ward_sheets, ~read_xlsx(path = "2023-11-28_glasgow-acf.xlsx",
sheet = .))
cases_by_ward_sex_year %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
NA
Aggregate together to get cases by division
cases_by_division <- cases_by_ward_sex_year %>%
left_join(ward_lookup) %>%
group_by(division, year, tb_type) %>%
summarise(cases = sum(cases, na.rm = TRUE))
Joining with `by = join_by(ward)``summarise()` has grouped output by 'division', 'year'. You can override using the `.groups` argument.
#shift year to midpoint
cases_by_division <- cases_by_division %>%
mutate(year2 = year+0.5) %>%
ungroup()
cases_by_division %>%
select(-year2) %>%
select(year, everything()) %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
cases_by_division %>%
mutate(tb_type = case_when(tb_type == "Pulmonary" ~ "Pulmonary TB",
tb_type == "Non-Pulmonary" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=cases, x=year2, group = tb_type, fill=tb_type), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
facet_wrap(division~., scales = "free_y") +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis notifications by Division",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Number of cases",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: extra-pulmonary TB cases by Division/Ward not reported in 1962-1963"
) +
theme_ggdist() +
theme(legend.position = "bottom")
cases_by_ward <- cases_by_ward_sex_year %>%
group_by(ward, year, tb_type) %>%
summarise(cases = sum(cases, na.rm = TRUE)) %>%
ungroup()
`summarise()` has grouped output by 'ward', 'year'. You can override using the `.groups` argument.
cases_by_ward %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
select(year, everything()) %>%
datatable()
#shift year to midpoint
cases_by_ward <- cases_by_ward %>%
mutate(year2 = year+0.5)
cases_by_ward %>%
mutate(tb_type = case_when(tb_type == "Pulmonary" ~ "Pulmonary TB",
tb_type == "Non-Pulmonary" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=cases, x=year2, group = tb_type, fill=tb_type), alpha=0.8) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
facet_wrap(ward~., scales = "free_y") +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis notifications by Ward",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Number of cases",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: extra-pulmonary TB cases by Division/Ward not reported in 1962-1963"
) +
theme(legend.position = "bottom")
NA
NA
As we don’t have denominators, we will just model the change in counts.
#list all the sheets
all_sheets <- excel_sheets("2023-11-28_glasgow-acf.xlsx")
#get the ward sheets
age_sex_sheets <- enframe(all_sheets) %>%
filter(grepl("by_age_sex", value)) %>%
pull(value)
cases_by_age_sex <- map_df(age_sex_sheets, ~read_xlsx(path = "2023-11-28_glasgow-acf.xlsx",
sheet = .))
cases_by_age_sex %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
NA
NA
Now calculate incidence per 100,000 population
Merge the notification and population denominator datasets together.
Here we need to include the whole population (with shipping and institutions) as they are included in the notifications.
overall_inc <- overall_pops %>%
left_join(cases_by_year)
Joining with `by = join_by(year, year2)`
overall_inc <- overall_inc %>%
mutate(inc_pulm_100k = pulmonary_notifications/total_population*100000,
inc_ep_100k = `non-pulmonary_notifications`/total_population*100000,
inc_100k = total_notifications/total_population*100000)
overall_inc %>%
select(year, inc_100k, inc_pulm_100k, inc_ep_100k) %>%
mutate_at(.vars = vars(inc_100k, inc_pulm_100k, inc_ep_100k),
.funs = funs(round)) %>%
datatable()
Warning: `funs()` was deprecated in dplyr 0.8.0.
Please use a list of either functions or lambdas:
# Simple named list:
list(mean = mean, median = median)
# Auto named with `tibble::lst()`:
tibble::lst(mean, median)
# Using lambdas
list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
overall_inc %>%
select(year2, inc_pulm_100k, inc_ep_100k) %>%
pivot_longer(cols = c(inc_pulm_100k, `inc_ep_100k`)) %>%
mutate(name = case_when(name == "inc_pulm_100k" ~ "Pulmonary TB",
name == "inc_ep_100k" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=value, x=year2, group = name, fill=name), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis case notification rate",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Case notification rate (per 100,000)",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist() +
theme(legend.position = "bottom")
NA
NA
NA
division_inc <- division_pops %>%
left_join(cases_by_division)
Joining with `by = join_by(division, year)`
division_inc <- division_inc %>%
mutate(inc_100k = cases/total_population*100000)
division_inc %>%
select(year, division, tb_type, inc_100k) %>%
mutate_at(.vars = vars(inc_100k),
.funs = funs(round)) %>%
datatable()
Warning: `funs()` was deprecated in dplyr 0.8.0.
Please use a list of either functions or lambdas:
# Simple named list:
list(mean = mean, median = median)
# Auto named with `tibble::lst()`:
tibble::lst(mean, median)
# Using lambdas
list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
division_inc %>%
mutate(tb_type = case_when(tb_type == "Pulmonary" ~ "Pulmonary TB",
tb_type == "Non-Pulmonary" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=inc_100k, x=year2, group = tb_type, fill=tb_type), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
facet_wrap(division~.) +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis case notification rate, by Division",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Case notification rate (per 100,000)",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: extra-pulmonary TB cases by Division/Ward not reported in 1962-1963"
) +
theme_ggdist() +
theme(legend.position = "bottom")
NA
NA
NA
Here we will filter out the institutions and harbour from the denominators, as we don’t have reliable population denominators for them.
ward_inc <- ward_pops %>%
left_join(cases_by_ward)
Joining with `by = join_by(ward, year, year2)`
ward_inc <- ward_inc %>%
mutate(inc_100k = cases/population_without_inst_ship*100000)
ward_inc %>%
select(year, ward, tb_type, inc_100k) %>%
mutate_at(.vars = vars(inc_100k),
.funs = funs(round)) %>%
datatable()
Warning: `funs()` was deprecated in dplyr 0.8.0.
Please use a list of either functions or lambdas:
# Simple named list:
list(mean = mean, median = median)
# Auto named with `tibble::lst()`:
tibble::lst(mean, median)
# Using lambdas
list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
ward_inc %>%
mutate(tb_type = case_when(tb_type == "Pulmonary" ~ "Pulmonary TB",
tb_type == "Non-Pulmonary" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=inc_100k, x=year2, group = tb_type, fill=tb_type), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
facet_wrap(ward~.) +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis case notification rate, by Ward",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Incidence (per 100,000)",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: extra-pulmonary TB cases by Division/Ward not reported in 1962-1963"
) +
theme(legend.position = "bottom")
NA
NA
NA
NA
On a map
st_as_sf(left_join(ward_inc, glasgow_wards_1951)) %>%
filter(tb_type=="Pulmonary") %>%
ggplot() +
geom_sf(aes(fill=inc_100k)) +
facet_wrap(year~., ncol = 7) +
scale_fill_viridis_c(name="Case notification rate (per 100,000)",
option = "A") +
theme_ggdist() +
theme(legend.position = "top",
legend.key.width = unit(2, "cm"),
panel.border = element_rect(colour = "grey78", fill=NA)) +
guides(fill=guide_colorbar(title.position = "top"))
Joining with `by = join_by(division, ward, ward_number)`
Import the TB mortality data.
First, overall deaths. Note that in the original reports, we have a pulmonary TB death rate per million for all years, and numbers of pulmonary TB deaths for each year apart from 1950.
#get the overall mortality sheets
deaths_sheets <- enframe(all_sheets) %>%
filter(grepl("deaths", value)) %>%
pull(value)
overall_deaths <- map_df(deaths_sheets, ~read_xlsx(path = "2023-11-28_glasgow-acf.xlsx",
sheet = .))
overall_deaths %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
NA
NA
NA
Plot the raw numbers of pulmonary deaths
overall_deaths %>%
ggplot(aes(x=year, y=pulmonary_deaths)) +
geom_line(colour = "#DE0D92") +
geom_point(colour = "#DE0D92") +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
labs(y="Pulmonary TB deaths per year",
x = "Year",
title = "Numbers of pulmonary TB deaths",
subtitle = "Glasgow, 1950-1963",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: no data for 1950") +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA))
NA
NA
Now the incidence of pulmonary TB death
overall_deaths %>%
ggplot(aes(x=year, y=pulmonary_death_rate_per_100k)) +
geom_line(colour = "#4D6CFA") +
geom_point(colour = "#4D6CFA") +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
labs(y="Annual incidence of death (per 100,000)",
x = "Year",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)") +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA))
ggsave(here("figures/s7.png"), width=10)
Saving 10 x 4.51 in image
Make Table 1 here, and save for publication.
overall_pops %>%
select(year, total_population) %>%
left_join(overall_inc %>%
select(year,
pulmonary_notifications, inc_pulm_100k,
`non-pulmonary_notifications`, inc_ep_100k,
total_notifications, inc_100k)) %>%
left_join(overall_deaths %>%
select(year,
pulmonary_deaths, pulmonary_death_rate_per_100k)) %>%
mutate(across(where(is.numeric) & !(year), ~round(., digits=1))) %>%
mutate(across(where(is.numeric) & !(year), ~comma(.)))
Joining with `by = join_by(year)`Joining with `by = join_by(year)`
First model will investigate the impact of mass miniature X-ray campaign on pulmonary TB case notification rate using an interrupted time series analysis.
Set up the data
mdata1 <- overall_inc %>%
mutate(acf_period = case_when(year %in% c(1950:1956) ~ "a. pre-acf",
year %in% c(1957) ~ "b. acf",
year %in% c(1958:1963) ~ "c. post-acf")) %>%
mutate(y_num = 1:nrow(.)) %>%
rename(extrapulmonary_notifications = `non-pulmonary_notifications`)
Work on the priors a bit
Basic prior
basic_prior <- c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 0.25), class = b))
Look at the mean and variance of counts (counts of pulmonary notifications are what we are predicting)
#Mean of counts per year
mean(mdata1$pulmonary_notifications)
[1] 1858.429
#variance of counts per year
var(mdata1$pulmonary_notifications)
[1] 716579.8
Quite a bit of over-dispersion here, so negative binomial distribution might be a better choice of distributional family than Poisson.
Slightly more informative prior (“weakly informative” really)
ggplot(data = tibble(x = seq(from = 500, to = 5000, by = 10)),
aes(x = x, y = dgamma(x, shape = 2, rate = 0.001))) +
geom_area(color = "transparent",
fill = "#DE0D92") +
scale_x_continuous(NULL) +
coord_cartesian(xlim = c(500, 5000)) +
ggtitle(expression(brms~~gamma(2*", "*0.001)~shape~prior))
NA
NA
Fit a model with only priors
winform_prior <- c(prior(normal(0, 1), class = Intercept),
prior(gamma(2, 0.001), class = shape),
prior(normal(0, 0.25), class = b))
m_pulmonary_prior <- brm(
pulmonary_notifications ~ y_num + acf_period + acf_period:y_num + offset(log(total_population)),
data = mdata1,
family = negbinomial(),
seed = 1234,
chains = 4, cores = 4,
prior = winform_prior,
sample_prior = "only",
backend="cmdstanr",
save_pars = save_pars(all = TRUE),
warmup = 1000)
Compiling Stan program...
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
In file included from /var/folders/bh/wr0g5x9j2wq46p9hm3ft_b380000gn/T/RtmpT9voWX/model-496f7bc77ead.hpp:1:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/src/stan/model/model_header.hpp:4:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/stan/math.hpp:19:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/stan/math/rev.hpp:10:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/stan/math/rev/fun.hpp:198:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/stan/math/prim/functor.hpp:15:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/stan/math/prim/functor/integrate_ode_rk45.hpp:6:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/stan/math/prim/functor/ode_rk45.hpp:9:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/numeric/odeint.hpp:76:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/numeric/odeint/integrate/observer_collection.hpp:23:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/function.hpp:30:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/function/detail/prologue.hpp:17:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/function/function_base.hpp:21:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/type_index.hpp:29:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/type_index/stl_type_index.hpp:47:
/Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0
\
/boost/container_hash/hash.hpp:132:33: warning: 'unary_function<const std::error_category *, unsigned long>' is deprecated [-Wdeprecated-declarations]
|
struct hash_base : std::unary_function<T, std::size_t> {};
^
/Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/container_hash/hash.hpp:692:18: note: in instantiation of template class 'boost::hash_detail::hash_base<const std::error_category *>' requested here
: public boost::hash_detail::hash_base<T*>
^
/Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/container_hash/hash.hpp:420:24: note: in instantiation of template class 'boost::hash<const std::error_category *>' requested here
boost::hash<T> hasher;
^
/Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/container_hash/hash.hpp:551:9: note: in instantiation of function template specialization 'boost::hash_combine<const std::error_category *>' requested here
hash_combine(seed, &v.category());
^
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/__functional/unary_function.h:23:29: note: 'unary_function<const std::error_category *, unsigned long>' has been explicitly marked deprecated here
struct _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 unary_function
^
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/__config:850:41: note: expanded from macro '_LIBCPP_DEPRECATED_IN_CXX11'
# define _LIBCPP_DEPRECATED_IN_CXX11 _LIBCPP_DEPRECATED
^
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/__config:835:49: note: expanded from macro '_LIBCPP_DEPRECATED'
# define _LIBCPP_DEPRECATED __attribute__((deprecated))
^
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
1 warning generated.
|
/
-
ld: warning: duplicate -rpath '/Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/tbb' ignored
\
|
/
-
Start sampling
Running MCMC with 4 parallel chains...
Chain 1 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 1 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 1 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 1 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 1 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 1 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 1 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 1 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 1 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 1 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 1 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 2 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 2 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 2 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 2 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 2 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 2 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 2 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 2 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 2 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 2 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 3 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 3 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 3 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 3 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 3 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 3 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 3 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 3 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 3 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 3 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 4 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 4 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 4 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 4 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 4 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 4 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 4 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 4 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 4 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 4 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1 finished in 0.0 seconds.
Chain 2 finished in 0.0 seconds.
Chain 3 finished in 0.0 seconds.
Chain 4 finished in 0.0 seconds.
All 4 chains finished successfully.
Mean chain execution time: 0.0 seconds.
Total execution time: 0.6 seconds.
summary(m_pulmonary_prior)
Family: negbinomial
Links: mu = log; shape = identity
Formula: pulmonary_notifications ~ y_num + acf_period + acf_period:y_num + offset(log(total_population))
Data: mdata1 (Number of observations: 14)
Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
total post-warmup draws = 4000
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -0.02 2.43 -4.70 4.73 1.00 6429 2696
y_num -0.00 0.25 -0.50 0.49 1.00 6724 2706
acf_periodb.acf -0.00 0.25 -0.48 0.47 1.00 6068 2644
acf_periodc.postMacf -0.00 0.25 -0.49 0.49 1.00 7114 3186
y_num:acf_periodb.acf -0.00 0.25 -0.51 0.49 1.00 7021 2656
y_num:acf_periodc.postMacf 0.00 0.24 -0.47 0.48 1.00 6589 3042
Family Specific Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
shape 2001.26 1412.00 228.42 5527.71 1.00 4799 2525
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
conditional_effects(m_pulmonary_prior)
Now fit the model with the weakly informative priors
m_pulmonary_overall <- brm(
pulmonary_notifications ~ y_num + acf_period + acf_period:y_num + offset(log(total_population)),
data = mdata1,
family = negbinomial(),
seed = 1234,
chains = 4, cores = 4,
prior = winform_prior,
save_pars = save_pars(all = TRUE),
backend = "cmdstanr",
warmup = 1000)
Compiling Stan program...
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
In file included from /var/folders/bh/wr0g5x9j2wq46p9hm3ft_b380000gn/T/RtmpT9voWX/model-496f147e2ab1.hpp:1:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/src/stan/model/model_header.hpp:4:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/stan/math.hpp:19:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/stan/math/rev.hpp:10:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/stan/math/rev/fun.hpp:198:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/stan/math/prim/functor.hpp:15:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/stan/math/prim/functor/integrate_ode_rk45.hpp:6:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/stan/math/prim/functor/ode_rk45.hpp:9:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/numeric/odeint.hpp:76:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/numeric/odeint/integrate/observer_collection.hpp:23:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/function.hpp:30:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/function/detail/prologue.hpp:17:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/function/function_base.hpp:21:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/type_index.hpp:29:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/type_index/stl_type_index.hpp:47:
/Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0
\
/boost/container_hash/hash.hpp:132:33: warning: 'unary_function<const std::error_category *, unsigned long>' is deprecated [-Wdeprecated-declarations]
struct hash_base : std::unary_function<T, std::size_t> {};
^
/Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/container_hash/hash.hpp:692:18: note: in instantiation of template class 'boost::hash_detail::hash_base<const std::error_category *>' requested here
: public boost::hash_detail::hash_base<T*>
^
/Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/container_hash/hash.hpp:420:24: note: in instantiation of template class 'boost::hash<const std::error_category *>' requested here
boost::hash<T> hasher;
^
/Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/container_hash/hash.hpp:551:9: note: in instantiation of function template specialization 'boost::hash_combine<const std::error_category *>' requested here
hash_combine(seed, &v.category());
^
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/__functional/unary_function.h:23:29: note: 'unary_function<const std::error_category *, unsigned long>' has been explicitly marked deprecated here
struct _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 unary_function
^
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/__config:850:41: note: expanded from macro '_LIBCPP_DEPRECATED_IN_CXX11'
# define _LIBCPP_DEPRECATED_IN_CXX11 _LIBCPP_DEPRECATED
^
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/__config:835:49: note: expanded from macro '_LIBCPP_DEPRECATED'
|
# define _LIBCPP_DEPRECATED __attribute__((deprecated))
^
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
1 warning generated.
\
|
ld: warning: duplicate -rpath '/Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/tbb' ignored
/
-
\
Start sampling
Running MCMC with 4 parallel chains...
Chain 1 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 1 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 1 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 1 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 1 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 1 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 1 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 2 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 2 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 2 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 2 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 2 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 3 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 3 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 3 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 3 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 4 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 4 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 4 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 1 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 1 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 1 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 2 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 2 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 3 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 3 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 3 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 3 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 4 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 4 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 4 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 1 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 1 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 2 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 3 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 3 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 4 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 4 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 4 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1 finished in 0.5 seconds.
Chain 2 finished in 0.4 seconds.
Chain 3 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 4 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 4 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3 finished in 0.5 seconds.
Chain 4 finished in 0.5 seconds.
All 4 chains finished successfully.
Mean chain execution time: 0.5 seconds.
Total execution time: 0.8 seconds.
summary(m_pulmonary_overall)
Family: negbinomial
Links: mu = log; shape = identity
Formula: pulmonary_notifications ~ y_num + acf_period + acf_period:y_num + offset(log(total_population))
Data: mdata1 (Number of observations: 14)
Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
total post-warmup draws = 4000
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -6.10 0.03 -6.16 -6.04 1.00 2903 2347
y_num -0.02 0.01 -0.03 -0.01 1.00 2817 2512
acf_periodb.acf 0.02 0.26 -0.48 0.51 1.00 2262 2285
acf_periodc.postMacf 0.05 0.11 -0.17 0.26 1.00 2539 2254
y_num:acf_periodb.acf 0.08 0.03 0.02 0.14 1.00 2254 2168
y_num:acf_periodc.postMacf -0.05 0.01 -0.07 -0.03 1.00 2312 2208
Family Specific Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
shape 1803.98 1075.70 456.89 4516.48 1.00 2082 2079
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
plot(m_pulmonary_overall)
pp_check(m_pulmonary_overall, type='ecdf_overlay')
Using 10 posterior draws for ppc type 'ecdf_overlay' by default.
Summarise the posterior
f1b <- plot_counterfactual(model_data = mdata1, model = m_pulmonary_overall, population_denominator = total_population, outcome = inc_pulm_100k, grouping_var=NULL)
f1b
Make this into a figure
f1a <- st_as_sf(left_join(ward_inc, glasgow_wards_1951)) %>%
filter(tb_type=="Pulmonary") %>%
ggplot() +
geom_sf(aes(fill=inc_100k)) +
facet_wrap(year~., ncol = 7) +
scale_fill_viridis_c(name="Case notification rate (per 100,000)",
option = "A") +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA),
legend.position = "top",
#legend.key.width = unit(2, "cm"),
legend.title.align = 0.5) +
guides(fill=guide_colorbar(title.position = "top"))
Joining with `by = join_by(division, ward, ward_number)`
(f1a / f1b) + plot_annotation(tag_levels = "A")
ggsave(here("figures/f1.png"))
Saving 7.29 x 4.51 in image
Summary of change in notifications
summarise_change(model_data=mdata1, model=m_pulmonary_overall, population_denominator=total_population, grouping_var=NULL) %>%
map(datatable)
Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in dplyr 1.1.0.
Please use `reframe()` instead.
When switching from `summarise()` to `reframe()`, remember that `reframe()` always returns an ungrouped data frame and adjust accordingly.
$immediate_change
$post_change
$slope_change
NA
(Alternative way - keep in for now)
overall_immediate_draws <- mdata1 %>%
select(year, year2, y_num, acf_period, total_population, pulmonary_notifications) %>%
filter(year %in% c(1956,1957)) %>%
add_epred_draws(m_pulmonary_overall) %>%
mutate(inc_100k = .epred/total_population*100000) %>%
group_by(.draw) %>%
summarise(pct_change_immediate = (last(inc_100k) - first(inc_100k))/first(inc_100k)) %>%
ungroup()
overall_post_draws <- mdata1 %>%
select(year, year2, y_num, acf_period, total_population, pulmonary_notifications) %>%
filter(year %in% c(1956,1958)) %>%
add_epred_draws(m_pulmonary_overall) %>%
mutate(inc_100k = .epred/total_population*100000) %>%
group_by(.draw) %>%
summarise(pct_change_post = (last(inc_100k) - first(inc_100k))/first(inc_100k)) %>%
ungroup()
overall_slope_draws <- mdata1 %>%
select(year, year2, y_num, acf_period, total_population, pulmonary_notifications) %>%
filter(year %in% c(1950, 1956, 1958, 1963)) %>%
add_epred_draws(m_pulmonary_overall) %>%
mutate(inc_100k = .epred/total_population*100000) %>%
ungroup() %>%
mutate(n_years = length(year), .by=acf_period) %>%
group_by(acf_period, .draw) %>%
summarise(pct_change_slope = ((last(inc_100k) - first(inc_100k))/first(inc_100k))/n_years) %>%
distinct() %>%
pivot_wider(names_from = c(acf_period),
values_from = pct_change_slope) %>%
mutate(ratio_annual_slope = `c. post-acf` / `a. pre-acf`)
Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in dplyr 1.1.0.
Please use `reframe()` instead.
When switching from `summarise()` to `reframe()`, remember that `reframe()` always returns an ungrouped data frame and adjust accordingly.`summarise()` has grouped output by 'acf_period', '.draw'. You can override using the `.groups` argument.
Correlation between immediate effect and post effect of ACF
left_join(overall_immediate_draws, overall_post_draws) %>%
ggplot(aes(x=pct_change_immediate, y=pct_change_post)) +
geom_hdr(
aes(fill = after_stat(probs)),
alpha = 1) +
#geom_hdr_points(aes(colour = after_stat(probs)), size=0.5) +
geom_smooth(method = "lm", se=FALSE, colour="black", linewidth=0.5) +
stat_regline_equation(label.x = 0.25, label.y = -0.25, size=4) +
scale_x_continuous(labels = percent,
breaks = pretty_breaks(n = 10)) +
scale_y_continuous(labels = percent,
breaks = pretty_breaks(n = 5)) +
scale_fill_viridis_d(option="E", name="") +
labs(title="Correlation between immediate ACF impact and post-ACF case notification rate",
y="Post intervention impact: ercentage change in CNR (1958 vs. 1956)",
x="Immediate impact: percentage change in CNR (1957 vs. 1956)",
caption="Boundaries are posterior desnity intervals from 4000 draws") +
theme_ggdist() +
theme(legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA))
Joining with `by = join_by(.draw)`
Correlation between immediate effect and change in slope
left_join(overall_immediate_draws, overall_slope_draws) %>%
ggplot(aes(x=pct_change_immediate, y=ratio_annual_slope)) +
geom_hdr(
aes(fill = after_stat(probs)),
alpha = 1) +
#geom_hdr_points(aes(colour = after_stat(probs)), size=0.5) +
geom_smooth(method = "lm", se=FALSE, colour="black", linewidth=0.5) +
#stat_regline_equation(label.x = 0.25, label.y = 0.02, size=4) +
scale_x_continuous(labels = percent,
breaks = pretty_breaks(n = 10)) +
scale_y_continuous(breaks = pretty_breaks(n = 5),
limits = c(0, 10)) +
scale_fill_viridis_d(option="E", name="") +
labs(title="Correlation between immediate ACF impact and post-ACF case notification rate",
y="Post intervention impact: Percentage change in CNR (1958 vs. 1956)",
x="Immediate impact: percentage change in CNR (1957 vs. 1956)",
caption="Points are draws from posteior distribution") +
theme_ggdist() +
theme(legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA))
Joining with `by = join_by(.draw)`
overall_pulmonary_counterf <- calcuate_counterfactual(model_data = mdata1, model=m_pulmonary_overall, population_denominator = total_population)
Joining with `by = join_by(year, total_population, .draw)`Joining with `by = join_by(.draw)`
overall_pulmonary_counterf$counter_post %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `across(c(pct_change:pct_change.upper), percent, accuracy = 0.1)`.
Caused by warning:
! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
Supply arguments directly to `.fns` through an anonymous function instead.
# Previously
across(a:b, mean, na.rm = TRUE)
# Now
across(a:b, \(x) mean(x, na.rm = TRUE))
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
Total pulmonary TB cases averted between 1958 and 1963
overall_pulmonary_counterf$counter_post_overall %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
NA
m_extrap_overall <- brm(
extrapulmonary_notifications ~ y_num + acf_period + acf_period:y_num + offset(log(total_population)),
data = mdata1,
family = poisson(),
seed = 1234,
chains = 4, cores = 4,
prior = basic_prior,
save_pars = save_pars(all = TRUE),
backend = "cmdstanr",
warmup = 1000)
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
Running MCMC with 4 parallel chains...
Chain 1 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 1 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 1 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 1 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 1 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 1 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 1 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 1 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 1 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 1 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 1 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 2 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 2 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 2 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 2 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 2 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 2 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 2 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 2 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 2 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 2 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 3 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 3 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 3 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 3 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 3 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 3 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 3 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 3 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 4 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 4 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 4 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 4 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 4 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 4 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 4 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 4 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 4 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1 finished in 0.2 seconds.
Chain 2 finished in 0.3 seconds.
Chain 3 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 3 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3 finished in 0.3 seconds.
Chain 4 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 4 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 4 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4 finished in 0.2 seconds.
All 4 chains finished successfully.
Mean chain execution time: 0.2 seconds.
Total execution time: 0.6 seconds.
summary(m_extrap_overall)
Family: poisson
Links: mu = log
Formula: extrapulmonary_notifications ~ y_num + acf_period + acf_period:y_num + offset(log(total_population))
Data: mdata1 (Number of observations: 14)
Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
total post-warmup draws = 4000
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -7.90 0.05 -7.99 -7.81 1.00 2908 2532
y_num -0.09 0.01 -0.11 -0.07 1.00 2623 2183
acf_periodb.acf -0.00 0.24 -0.48 0.48 1.00 2558 2487
acf_periodc.postMacf -0.32 0.17 -0.64 0.01 1.00 2095 2415
y_num:acf_periodb.acf -0.02 0.03 -0.08 0.04 1.00 2413 2577
y_num:acf_periodc.postMacf 0.02 0.02 -0.02 0.05 1.00 1711 2108
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
plot(m_extrap_overall)
pp_check(m_extrap_overall, type='ecdf_overlay')
plot_counterfactual(model_data = mdata1, model=m_extrap_overall, population_denominator = total_population, outcome=inc_ep_100k)
ggsave(here("figures/s6.png"), width=10)
Saving 10 x 4.51 in image
A. Percentage change in mortality, from 1956 to 1957 (i.e. immediate ACF effect)
summarise_change(model_data=mdata1, model = m_extrap_overall, population_denominator = total_population) %>%
map(datatable)
Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in dplyr 1.1.0.
Please use `reframe()` instead.
When switching from `summarise()` to `reframe()`, remember that `reframe()` always returns an ungrouped data frame and adjust accordingly.
$immediate_change
$post_change
$slope_change
NA
overall_ep_counterf <- calcuate_counterfactual(model_data = mdata1, model=m_extrap_overall, population_denominator = total_population)
Joining with `by = join_by(year, total_population, .draw)`Joining with `by = join_by(.draw)`
overall_ep_counterf$counter_post %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
Total extra pulmonary TB cases averted between 1958 and 1963
overall_ep_counterf$counter_post_overall %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
NA
mdata2 <- ward_inc %>%
filter(tb_type=="Pulmonary") %>%
mutate(acf_period = case_when(year %in% c(1950:1956) ~ "a. pre-acf",
year %in% c(1957) ~ "b. acf",
year %in% c(1958:1963) ~ "c. post-acf")) %>%
group_by(ward) %>%
mutate(y_num = row_number()) %>%
ungroup()
(Note the denominator without institutionalised people and “shipping”!)
#weakly informative priors for multilevel model
basic_prior2 <- c(prior(normal(0, 1), class = Intercept),
prior(normal(0, 0.1), class = b),
prior(cauchy(0,5), class="sd"))
ggplot(data = tibble(x = seq(from = 0, to = 500, by = 10)),
aes(x = x, y = dgamma(x, shape = 1, rate = 0.01))) +
geom_area(color = "transparent",
fill = "#DE0D92") +
scale_x_continuous(NULL) +
scale_y_continuous(NULL, breaks = NULL) +
coord_cartesian(xlim = c(0, 500)) +
ggtitle(expression(brms~~gamma(1*", "*0.01)~shape~prior))
winform_prior2 <- c(prior(normal(0, 1), class = Intercept),
prior(gamma(1, 0.01), class = shape),
prior(normal(0, 1), class = b),
prior(cauchy(0,5), class="sd"),
prior(lkj(2), class="cor"))
m_pulmonary_ward_prior <- brm(
cases ~ y_num + acf_period + acf_period:y_num + (1 + y_num + acf_period + acf_period:y_num | ward) + offset(log(population_without_inst_ship)),
data = mdata2,
family = negbinomial(),
seed = 1234,
chains = 4, cores = 4,
prior = winform_prior2,
save_pars = save_pars(all = TRUE),
sample_prior = "only",
backend = "cmdstanr",
warmup = 1000)
Compiling Stan program...
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
In file included from /var/folders/bh/wr0g5x9j2wq46p9hm3ft_b380000gn/T/RtmpT9voWX/model-496f2c7e92bc.hpp:1:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/src/stan/model/model_header.hpp:4:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/stan/math.hpp:19:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/stan/math/rev.hpp:10:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/stan/math/rev/fun.hpp:198:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/stan/math/prim/functor.hpp:15:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/stan/math/prim/functor/integrate_ode_rk45.hpp:6:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/stan/math/prim/functor/ode_rk45.hpp:9:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/numeric/odeint.hpp:76:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/numeric/odeint/integrate/observer_collection.hpp:23:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/function.hpp:30:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/function/detail/prologue.hpp:17:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/function/function_base.hpp:21:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/type_index.hpp:29:
In file included from /Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/type_index/stl_type_index.hpp:47:
/Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0
-
/boost/container_hash/hash.hpp:132:33: warning: 'unary_function<const std::error_category *, unsigned long>' is deprecated [-Wdeprecated-declarations]
struct hash_base : std::unary_function<T, std::size_t> {};
^
/Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/container_hash/hash.hpp:692:18: note: in instantiation of template class 'boost::hash_detail::hash_base<const std::error_category *>' requested here
: public boost::hash_detail::hash_base<T*>
^
/Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/container_hash/hash.hpp:420:24: note: in instantiation of template class 'boost::hash<const std::error_category *>' requested here
boost::hash<T> hasher;
^
/Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/boost_1.78.0/boost/container_hash/hash.hpp:551:9: note: in instantiation of function template specialization 'boost::hash_combine<const std::error_category *>' requested here
hash_combine(seed, &v.category());
^
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/__functional/unary_function.h:23:29: note: 'unary_function<const std::error_category *, unsigned long>' has been explicitly marked deprecated here
struct _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 unary_function
^
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/__config:850:41: note: expanded from macro '_LIBCPP_DEPRECATED_IN_CXX11'
\
# define _LIBCPP_DEPRECATED_IN_CXX11 _LIBCPP_DEPRECATED
^
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/__config:835:49: note: expanded from macro '_LIBCPP_DEPRECATED'
# define _LIBCPP_DEPRECATED __attribute__((deprecated))
^
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
|
/
-
\
1 warning generated.
|
/
ld: warning: duplicate -rpath '/Users/petermacpherson/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/tbb' ignored
-
\
|
Start sampling
Running MCMC with 4 parallel chains...
Chain 1 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 1 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 1 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 1 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 1 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 1 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 2 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 2 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 2 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 2 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 2 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 2 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 3 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 3 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 3 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 3 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4 Iteration: 100 / 2000 [ 5%] (Warmup)
Chain 4 Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4 Iteration: 300 / 2000 [ 15%] (Warmup)
Chain 4 Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4 Iteration: 500 / 2000 [ 25%] (Warmup)
Chain 1 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 1 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 2 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 3 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 3 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 4 Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4 Iteration: 700 / 2000 [ 35%] (Warmup)
Chain 4 Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4 Iteration: 900 / 2000 [ 45%] (Warmup)
Chain 4 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 1 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 3 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 4 Iteration: 1100 / 2000 [ 55%] (Sampling)
Chain 4 Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 1 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 3 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 4 Iteration: 1300 / 2000 [ 65%] (Sampling)
Chain 4 Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 1 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 3 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 4 Iteration: 1500 / 2000 [ 75%] (Sampling)
Chain 4 Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1 finished in 0.8 seconds.
Chain 2 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 4 Iteration: 1700 / 2000 [ 85%] (Sampling)
Chain 4 Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2 finished in 0.8 seconds.
Chain 3 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4 Iteration: 1900 / 2000 [ 95%] (Sampling)
Chain 4 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3 finished in 0.8 seconds.
Chain 4 finished in 0.8 seconds.
All 4 chains finished successfully.
Mean chain execution time: 0.8 seconds.
Total execution time: 1.1 seconds.
conditional_effects(m_pulmonary_ward_prior)
NA
NA
Now fit the model with data
summary(m_pulmonary_ward)
Family: negbinomial
Links: mu = log; shape = identity
Formula: cases ~ y_num + acf_period + acf_period:y_num + (1 + y_num + acf_period + acf_period:y_num | ward) + offset(log(population_without_inst_ship))
Data: mdata2 (Number of observations: 518)
Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
total post-warmup draws = 4000
Group-Level Effects:
~ward (Number of levels: 37)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.26 0.04 0.20 0.34 1.00 1231 2338
sd(y_num) 0.02 0.01 0.01 0.03 1.00 920 1020
sd(acf_periodb.acf) 0.08 0.05 0.00 0.18 1.00 1334 2009
sd(acf_periodc.postMacf) 0.13 0.08 0.01 0.31 1.00 917 1291
sd(y_num:acf_periodb.acf) 0.01 0.01 0.00 0.02 1.00 1271 1952
sd(y_num:acf_periodc.postMacf) 0.01 0.01 0.00 0.03 1.01 476 992
cor(Intercept,y_num) -0.50 0.20 -0.80 -0.04 1.00 2144 2019
cor(Intercept,acf_periodb.acf) -0.28 0.33 -0.79 0.44 1.00 2848 3038
cor(y_num,acf_periodb.acf) -0.07 0.32 -0.65 0.56 1.00 4199 3014
cor(Intercept,acf_periodc.postMacf) -0.18 0.27 -0.67 0.38 1.00 3808 2947
cor(y_num,acf_periodc.postMacf) 0.14 0.30 -0.44 0.70 1.00 2986 2796
cor(acf_periodb.acf,acf_periodc.postMacf) 0.09 0.33 -0.58 0.68 1.00 1894 2813
cor(Intercept,y_num:acf_periodb.acf) -0.28 0.32 -0.80 0.42 1.00 2714 2945
cor(y_num,y_num:acf_periodb.acf) -0.06 0.32 -0.64 0.58 1.00 4764 2747
cor(acf_periodb.acf,y_num:acf_periodb.acf) -0.09 0.34 -0.71 0.59 1.00 3966 3377
cor(acf_periodc.postMacf,y_num:acf_periodb.acf) 0.09 0.33 -0.58 0.67 1.00 2758 3268
cor(Intercept,y_num:acf_periodc.postMacf) 0.02 0.31 -0.57 0.60 1.00 3797 2885
cor(y_num,y_num:acf_periodc.postMacf) -0.10 0.33 -0.69 0.58 1.00 1832 2559
cor(acf_periodb.acf,y_num:acf_periodc.postMacf) 0.07 0.33 -0.58 0.65 1.00 2236 2984
cor(acf_periodc.postMacf,y_num:acf_periodc.postMacf) -0.14 0.36 -0.80 0.54 1.00 1377 1698
cor(y_num:acf_periodb.acf,y_num:acf_periodc.postMacf) 0.06 0.33 -0.60 0.67 1.00 1893 3117
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -6.14 0.05 -6.24 -6.04 1.00 779 1461
y_num -0.02 0.01 -0.03 -0.01 1.00 2282 2485
acf_periodb.acf -0.02 0.99 -1.99 1.90 1.00 4274 3362
acf_periodc.postMacf 0.04 0.11 -0.16 0.25 1.00 3841 3092
y_num:acf_periodb.acf 0.09 0.12 -0.15 0.33 1.00 4266 3248
y_num:acf_periodc.postMacf -0.05 0.01 -0.07 -0.03 1.00 3747 2929
Family Specific Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
shape 99.94 23.96 65.64 156.48 1.00 2447 2855
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
plot_counterfactual(model_data = mdata2, model=m_pulmonary_ward, outcome = inc_100k, population_denominator = population_without_inst_ship, grouping_var = ward, ward)
ggsave(here("figures/s4.png"), width=10, height=12)
A. percentage increase in CNR, from 1956 to 1957 (i.e. immediate ACF effect)
ward_change <- summarise_change(model_data = mdata2, model = m_pulmonary_ward, population_denominator = population_without_inst_ship, grouping_var=ward)
Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in dplyr 1.1.0.
Please use `reframe()` instead.
When switching from `summarise()` to `reframe()`, remember that `reframe()` always returns an ungrouped data frame and adjust accordingly.
ward_change %>%
map(datatable)
$immediate_change
$post_change
$slope_change
NA
As a supplementary figure
ward_change$immediate_change %>%
arrange(acf_inc100k_rr) %>%
ggplot() +
geom_pointrange(aes(y=acf_inc100k_rr, ymin=acf_inc100k_rr.lower, ymax=acf_inc100k_rr.upper,
x=fct_reorder(ward, acf_inc100k_rr),
colour = acf_inc100k_rr)) +
geom_hline(aes(yintercept=1), linetype=2) +
coord_flip() +
scale_colour_viridis_b(option = "D") +
scale_y_continuous(limits = c(0.8,3.0)) +
labs(x="",
y="Relative posterior predicted case notification rate (per 100,000; 95% UI)\nACF (1957) vs. Before ACF (1956)") +
theme_ggdist() +
theme(legend.position = "none",
panel.border = element_rect(colour = "grey78", fill=NA))
ggsave(here("figures/s5.png"))
Saving 7.29 x 4.51 in image
ward_change$post_change %>%
arrange(acf_inc100k_rr) %>%
ggplot() +
geom_pointrange(aes(y=acf_inc100k_rr, ymin=acf_inc100k_rr.lower, ymax=acf_inc100k_rr.upper,
x=fct_reorder(ward, acf_inc100k_rr),
colour = acf_inc100k_rr)) +
geom_hline(aes(yintercept=1), linetype=2) +
coord_flip() +
scale_colour_viridis_b(option = "D") +
#scale_y_continuous(limits = c(0.8,3.0)) +
labs(x="",
y="Relative posterior predicted case notification rate (per 100,000; 95% UI)\nAfter ACF (1958) vs. Before ACF (1956)") +
theme_ggdist() +
theme(legend.position = "none",
panel.border = element_rect(colour = "grey78", fill=NA))
ggsave(here("figures/s6.png"))
Saving 7.29 x 4.51 in image
percentage change = (final value - initial value) / initial value
ward_change$slope_change %>%
ggplot() +
geom_pointrange(aes(y=pct_change_epred_overall , ymin=pct_change_lower_overall , ymax=pct_change_upper_overall ,
group=acf_period, colour=acf_period,
x = fct_reorder(ward, pct_change_epred_overall ))) +
coord_flip() +
scale_y_continuous(labels =percent) +
scale_colour_manual(values = c("#DE0D92", "#4D6CFA")) +
#scale_y_continuous(limits = c(0.8,3.0)) +
labs(title="Intervention effect of mass miniature chest X-ray in Glasgow",
subtitle="By municipal ward",
x="",
y="Mean annual rate of change in case notification rate (95% CrI)\n Before ACF (1950-1956) vs. after ACF (1958-1963)") +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA))
Suggestion from Pete D 2024-02-02: Try plotting these on choropleth maps
bind_rows(
(glasgow_wards_1951 %>%
left_join(ward_change$immediate_change) %>%
mutate(estimate = "Immediate effect")),
(glasgow_wards_1951 %>%
left_join(ward_change$post_change) %>%
mutate(estimate = "Post ACF effect")),
(glasgow_wards_1951 %>%
left_join(ward_change$post_change) %>%
mutate(estimate = "Slope effect"))
) %>%
select(ward, acf_inc100k_rr, estimate) %>%
mutate(ward = case_when(ward=="Shettleston and Tollcross" ~ "Shettleston and\nTollcross",
ward=="Partick (West)" ~ "Partick\n(West)",
ward=="Partick (East)" ~ "Partick\n(East)",
ward=="North Kelvin" ~ "North\nKelvin",
ward=="Kinning Park" ~ "Kinning\nPark",
TRUE ~ ward)) %>%
split(.$estimate) %>%
map(~ ggplot(.) +
geom_sf(aes(fill=acf_inc100k_rr)) +
geom_sf_label(aes(label = ward), size=1.5, fill=NA, label.size = NA, colour="black", family = "Segoe UI") +
scale_fill_viridis_c() +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA)) +
labs(y="", x="",
fill="RR")) %>%
cowplot::plot_grid(plotlist = ., ncol = 3,
labels = c('A: Immediate effect (1957 vs. 1956)',
'B: Post-ACF effect (1958 vs. 1956)',
'C: Slope change effect (1958-63 vs. 1950-56'))
Joining with `by = join_by(ward)`Joining with `by = join_by(ward)`Joining with `by = join_by(ward)`Warning: st_point_on_surface may not give correct results for longitude/latitude dataWarning: st_point_on_surface may not give correct results for longitude/latitude dataWarning: st_point_on_surface may not give correct results for longitude/latitude data
ggsave(here("figures/ward_effects.png"), width = 16)
Saving 16 x 4.5 in image
NA
NA
(Alternative figure - keep in for the minute)
Is there any correlation between immediate increase and a) post-intervention (1958) effect, and b) post intervention slope (1958-1963)
Try a different way with the full distribution of posteriors
left_join(ward_immediate_draws_expanded, ward_post_draws) %>%
ggplot(aes(y=rr_1958_vs_1956, x=rr_1957_vs_mean_1950_1956)) +
geom_hdr_points(size=0.1) +
geom_smooth(method = "lm", se=FALSE, colour="black", linewidth=0.5) +
facet_wrap(ward~.)
Joining with `by = join_by(.draw, ward)`
#correlation between slope rate ratio (post vs pre) and magnitude of ACF effect
#a) with magnitude compared to 1956 only
left_join(ward_immediate_draws, ward_slope_draws_rr) %>%
ggplot(aes(y=rr_slope, x=rr_1957_vs_1956)) +
geom_hdr_points(size=0.1) +
geom_smooth(method = "lm", se=FALSE, colour="black", linewidth=0.5) +
facet_wrap(ward~.)
Joining with `by = join_by(.draw, ward)`
Correlation between immediate effect and post effect of ACF
#
# left_join(ward_immediate_draws, ward_post_draws) %>%
# ggplot(aes(x=pct_change_immediate, y=pct_change_post, group=ward)) +
# geom_hdr_points(size=0.1) +
# geom_smooth(method = "lm", se=FALSE, colour="black", linewidth=0.5) +
# stat_regline_equation(label.x = 0, label.y = 0.25, size=4) +
# scale_colour_scico_d(palette = "lipari", name = "Posterior probability") +
# scale_x_continuous(labels = percent) +
# scale_y_continuous(labels = percent) +
# labs(title="Correlation between immediate ACF impact and post-ACF case notification rate",
# y="Post intervention impact: ercentage change in CNR (1958 vs. 1956)",
# x="Immediate impact: percentage change in CNR (1957 vs. 1956)",
# caption="Points are draws from posteior distribution") +
# theme_ggdist() +
# theme(legend.position = "bottom",
# panel.border = element_rect(colour = "grey78", fill=NA)) +
# facet_wrap(ward~.)
Correlation between immediate effect and change in slope
# left_join(ward_immediate_draws, ward_slope_draws) %>%
# ggplot(aes(x=pct_change_immediate, y=ratio_annual_slope, group=ward)) +
# geom_hdr_points(size=0.1) +
# geom_smooth(method = "lm", se=FALSE, colour="black", linewidth=0.5) +
# #stat_regline_equation(label.x = 0, label.y = 0.02, size=4) +
# scale_colour_scico_d(palette = "lipari", name = "Posterior probability") +
# scale_x_continuous(labels = percent) +
# scale_y_continuous(limits = c(0, 10)) +
# labs(title="Correlation between immediate ACF impact and post-ACF case notification rate",
# y="Post intervention impact: Percentage change in CNR (1958 vs. 1956)",
# x="Immediate impact: percentage change in CNR (1957 vs. 1956)",
# caption="Points are draws from posteior distribution") +
# theme_ggdist() +
# theme(legend.position = "bottom",
# panel.border = element_rect(colour = "grey78", fill=NA)) +
# facet_wrap(ward~.)
Join these together with the overall estimates to make a single figure for showing impact
# f2_data <-
# left_join(overall_immediate_draws, overall_post_draws) %>%
# left_join(overall_slope_draws) %>%
# mutate(level = "overall",
# ward = "Glasgow") %>%
# bind_rows(
# left_join(ward_immediate_draws, ward_post_draws) %>%
# left_join(ward_slope_draws) %>%
# mutate(level = "ward")
# )
#
# f2a <- f2_data %>%
# ggplot(aes(x=pct_change_immediate, y=pct_change_post, group=ward)) +
# geom_hdr_points(size=0.1) +
# geom_smooth(method = "lm", se=FALSE, colour="black", linewidth=0.5) +
# stat_regline_equation(label.x = 0, label.y = 0.12, size=4) +
# scale_colour_scico_d(palette = "lipari", name = "Posterior probability density") +
# scale_x_continuous(labels = percent) +
# scale_y_continuous(labels = percent) +
# labs(y="Post-intervention impact: Percentage change in CNR (1958 vs. 1956)",
# x="Immediate impact: percentage change in CNR (1957 vs. 1956)") +
# theme_ggdist() +
# theme(legend.position = "bottom",
# panel.border = element_rect(colour = "grey78", fill=NA)) +
# facet_wrap(fct_relevel(ward,
# "Glasgow",
# after=0)~., ncol = 5) +
# guides(colour = guide_legend(override.aes = list(size=4)))
#
# f2a
#
# f2b <- f2_data %>%
# ggplot(aes(x=pct_change_immediate, y=ratio_annual_slope, group=ward)) +
# geom_hdr_points(size=0.1) +
# geom_smooth(method = "lm", se=FALSE, colour="black", linewidth=0.5) +
# stat_regline_equation(label.x = 1.2, label.y = 12, size=4) +
# scale_x_continuous(labels = percent,
# breaks = pretty_breaks(n = 4)) +
# scale_y_continuous(breaks = pretty_breaks(n = 4),
# limits = c(0,15)) +
# scale_fill_viridis_d(option="E") +
# labs(y="Post-intervention impact: Relative change in annual CNR slope (1958-1963 vs. 1950-1956)",
# x="Immediate impact: percentage change in CNR (1957 vs. 1956)",
# colour="Posterior probability density") +
# theme_ggdist() +
# theme(legend.position = "bottom",
# panel.border = element_rect(colour = "grey78", fill=NA)) +
# facet_wrap(fct_relevel(ward,
# "Glasgow",
# after=0)~., ncol = 5) +
# guides(colour = guide_legend(override.aes = list(size=4)))
#
# f2b
#
# (f2a / f2b) + plot_annotation(tag_levels = 'A')
ggsave(here("figures/f2.png"), height=18, width=10)
ward_counterf <- calcuate_counterfactual(model_data = mdata2, model=m_pulmonary_ward, population_denominator = population_without_inst_ship, grouping_var=ward)
ward_counterf %>%
map(datatable)
Total pulmonary TB cases averted between 1958 and 1963
ward_averted <- ward_counterf$counter_post %>%
summarise(across(c(cases_averted, cases_averted.lower, cases_averted.upper), sum), .by=ward) %>%
mutate_if(is.double, ~ scales::number(x = ., accuracy = 0.1, big.mark = ",")) %>%
mutate(cases_averted_txt = glue::glue("{cases_averted}\n({cases_averted.lower}-{cases_averted.upper})")) %>%
select(ward, cases_averted_txt)
ward_averted %>% datatable()
Add the numbers averted for each ward to the figure
plot_counterfactual(model_data = mdata2, model=m_pulmonary_ward, outcome = inc_100k, population_denominator = population_without_inst_ship, grouping_var = ward, ward) +
geom_text(data=ward_averted, aes(x=1961, y=500, label=cases_averted_txt), size=3)
ggsave(here("figures/s4.png"), width=14, height=12)
Fit the model
(Not rewritten the functions for this yet)
mdata3 <- cases_by_age_sex %>%
filter(tb_type=="Pulmonary") %>%
mutate(acf_period = case_when(year %in% c(1950:1956) ~ "a. pre-acf",
year %in% c(1957) ~ "b. acf",
year %in% c(1958:1963) ~ "c. post-acf")) %>%
mutate(year2 = year+0.5) %>%
group_by(age, sex) %>%
mutate(y_num = row_number()) %>%
ungroup()
winform_prior3 <- c(prior(normal(0, 1), class = Intercept),
#prior(gamma(0.5, 0.01), class = shape),
prior(normal(0, 1), class = b),
prior(cauchy(0,5), class="sd"),
prior(lkj(2), class="cor"))
m_age_sex <- brm(
cases ~ y_num + (acf_period)*(age*sex) + (acf_period:y_num)*(age*sex),
data = mdata3,
family = negbinomial(),
seed = 1234,
chains = 4, cores = 4,
prior = basic_prior,
backend = "cmdstanr")
summary(m_age_sex)
plot(m_age_sex)
pp_check(m_age_sex, type='ecdf_overlay')
Summarise posterior
#posterior draws, and summarise
age_sex_summary <- mdata3 %>%
select(year, year2, y_num, acf_period, age, sex) %>%
add_epred_draws(m_age_sex) %>%
group_by(year2, acf_period, age, sex) %>%
mean_qi() %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Before Intervention",
acf_period=="c. post-acf" ~ "Post Intervention"))
#create the counterfactual (no intervention), and summarise
age_sex_counterfact <-
tibble(year = mdata3$year,
year2 = mdata3$year2,
y_num = mdata3$y_num,
age = mdata3$age,
sex = mdata3$sex,
acf_period = factor("a. pre-acf")) %>%
add_epred_draws(m_age_sex) %>%
group_by(year2, acf_period, age, sex) %>%
mean_qi() %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Before Intervention",
acf_period=="c. post-acf" ~ "Post Intervention")) %>%
ungroup() %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
mutate(sex = case_when(sex== "M" ~ "Male",
sex== "F" ~ "Female"))
age_sex_summary %>%
ungroup() %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
mutate(sex = case_when(sex== "M" ~ "Male",
sex== "F" ~ "Female")) %>%
ggplot() +
geom_ribbon(aes(ymin=.epred.lower, ymax=.epred.upper, x=year2, group = acf_period, fill=acf_period), alpha=0.5) +
geom_ribbon(data = age_sex_counterfact %>% filter(year>=1956),
aes(ymin=.epred.lower, ymax=.epred.upper, x=year2, fill="Counterfactual"), alpha=0.5) +
geom_line(data = age_sex_counterfact %>% filter(year>=1956),
aes(y=.epred, x=year2, colour="Counterfactual")) +
geom_line(aes(y=.epred, x=year2, group=acf_period, colour=acf_period)) +
geom_point(data = mdata3 %>%
ungroup() %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
mutate(sex = case_when(sex== "M" ~ "Male",
sex== "F" ~ "Female")) , aes(y=cases, x=year2, shape=acf_period), size=2) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
ggh4x::facet_grid2(age~sex, scales = "free_y", independent = "y") +
theme_ggdist() +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
scale_fill_manual(values = c("#DE0D92", "grey50", "#4D6CFA") , name="") +
scale_colour_manual(values = c("#DE0D92", "grey50", "#4D6CFA") , name="") +
scale_shape_discrete(name="") +
labs(
x = "Year",
y = "Case notifications (n)",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme(legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA),
title = element_text(size=14),
axis.text = element_text(size=14),
legend.text = element_text(size=12)) +
guides(shape="none")
ggsave(here("figures/s7.png"), height=10)
nd <- mdata3 %>%
filter(year %in% c(1956:1957)) %>%
select(acf_period, y_num, age, sex)
age_sex_impact_out <-
add_epred_draws(m_age_sex,
newdata=nd) %>%
ungroup() %>%
select(acf_period, .epred, age, sex) %>%
pivot_wider(names_from = acf_period,
values_from = .epred,
values_fn = list) %>%
unnest() %>%
rename(pre_epred = 3,
post_epred = 4) %>%
mutate(acf_diff = post_epred-pre_epred,
acf_rr = post_epred/pre_epred) %>%
group_by(age, sex) %>%
mean_qi(acf_diff, acf_rr)
age_sex_impact_out %>%
mutate_if(is.double, ~ scales::number(x = ., accuracy = 0.01, big.mark = ",")) %>%
datatable()
f3a <- age_sex_impact_out %>%
mutate(sex = case_when(sex=="M" ~ "Male",
sex=="F" ~ "Female")) %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
ggplot() +
geom_pointrange(aes(y=acf_rr, ymin=acf_rr.lower, ymax=acf_rr.upper, group=sex,
x=age,
colour = sex),
position = position_dodge(width = 0.25)) +
geom_hline(aes(yintercept=1), linetype=2) +
scale_colour_manual(values = c("purple", "darkorange"), name="") +
labs(x="",
y="Relative notifications (95% UI)\nACF (1957) vs. Before ACF (1956)") +
theme_ggdist() +
theme(legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA))
nd <- mdata3 %>%
filter(year %in% c(1956,1958)) %>%
select(acf_period, y_num, age, sex)
#Do it with calculating incidence, then sumamrising.
age_sex_impact2 <-add_epred_draws(m_age_sex,
newdata=nd) %>%
ungroup() %>%
select(acf_period, .epred, age, sex) %>%
pivot_wider(names_from = acf_period,
values_from = .epred,
values_fn = list) %>%
unnest() %>%
rename(pre_epred = 3,
post_epred = 4) %>%
mutate(acf_diff = post_epred-pre_epred,
acf_rr = post_epred/pre_epred) %>%
group_by(age, sex) %>%
mean_qi(acf_diff, acf_rr)
age_sex_impact2 %>%
mutate_if(is.double, ~ scales::number(x = ., accuracy = 0.01, big.mark = ",")) %>%
datatable()
f3b <- age_sex_impact2 %>%
mutate(sex = case_when(sex=="M" ~ "Male",
sex=="F" ~ "Female")) %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
ggplot() +
geom_pointrange(aes(y=acf_rr, ymin=acf_rr.lower, ymax=acf_rr.upper, group=sex,
x=age,
colour = sex),
position = position_dodge(width = 0.25)) +
geom_hline(aes(yintercept=1), linetype=2) +
scale_colour_manual(values = c("purple", "darkorange"), name="") +
labs(x="",
y="Relative notifications (95% UI)\nACF (1958) vs. Before ACF (1956)") +
theme_ggdist() +
theme(legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA))
age_sex_impact3 <- mdata3 %>%
select(year, year2, y_num, acf_period, cases, age, sex) %>%
filter(year!=1957) %>%
add_epred_draws(m_age_sex) %>%
group_by(year, age, sex, acf_period) %>%
mean_qi(.epred) %>%
ungroup() %>%
mutate(n_years = length(year), .by=acf_period) %>%
summarise(pct_change_epred_overall = (((last(.epred) - first(.epred))/first(.epred))),
pct_change_lower_overall = (((last(.lower) - first(.lower))/first(.lower))),
pct_change_upper_overall = (((last(.upper) - first(.upper))/first(.upper))),
pct_change_epred_annual = (((last(.epred) - first(.epred))/first(.epred))/n_years),
pct_change_lower_annual = (((last(.lower) - first(.lower))/first(.lower))/n_years),
pct_change_upper_annual = (((last(.upper) - first(.upper))/first(.upper))/n_years),
.by = c(acf_period, age, sex)) %>%
distinct()
age_sex_impact3 %>%
mutate_if(is.double, percent) %>%
datatable()
f3c <- age_sex_impact3 %>%
mutate(sex = case_when(sex=="M" ~ "Male",
sex=="F" ~ "Female")) %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
ggplot() +
geom_hline(aes(yintercept=0), linetype=2) +
geom_pointrange(aes(y=pct_change_epred_annual, ymin=pct_change_lower_annual, ymax=pct_change_upper_annual, group=acf_period,
x=age,
colour = acf_period), size=0.1) +
scale_y_continuous(labels =percent) +
facet_grid(.~sex) +
coord_flip() +
scale_colour_manual(values = c("#DE0D92", "#4D6CFA")) +
labs(x="",
y="Mean annual rate of change in case notification rate (95% UI)\n Before ACF (1950-1956) vs. after ACF (1958-1963)",
colour="") +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA))
f3c
counterfact_age_sex <-
add_epred_draws(object = m_age_sex,
newdata = mdata3 %>%
select(year, year2, y_num, age, sex) %>%
mutate(acf_period = "a. pre-acf")) %>%
filter(year>1957) %>%
select(year, age, sex, .draw, .epred_counterf = .epred)
#Calcuate incidence per draw, then summarise.
post_change_age_sex <-
add_epred_draws(object = m_age_sex,
newdata = mdata3 %>%
select(year, year2, y_num, age, sex, acf_period)) %>%
filter(year>1957) %>%
ungroup() %>%
select(year, age, sex, .draw, .epred)
#for the overall period
counterfact_overall_age_sex <-
add_epred_draws(object = m_age_sex,
newdata = mdata3 %>%
select(year, year2, y_num, age, sex) %>%
mutate(acf_period = "a. pre-acf")) %>%
filter(year>1957) %>%
select(age, sex, .draw, .epred) %>%
group_by(age, sex, .draw) %>%
summarise(.epred_counterf = sum(.epred)) %>%
mutate(year = "Overall (1958-1963)")
#Calcuate incidence per draw, then summarise.
post_change_overall_age_sex <-
add_epred_draws(object = m_age_sex,
newdata = mdata3 %>%
select(year, year2, y_num, age, sex, acf_period)) %>%
filter(year>1957) %>%
select(age, sex, .draw, .epred) %>%
group_by(.draw, age, sex) %>%
summarise(.epred = sum(.epred))
left_join(counterfact_age_sex, post_change_age_sex) %>%
mutate(cases_averted = .epred_counterf-.epred,
pct_change = (.epred - .epred_counterf)/.epred_counterf) %>%
group_by(year, age, sex) %>%
mean_qi(cases_averted, pct_change) %>%
ungroup() %>%
datatable()
counter_post_overall_age_sex <-
left_join(counterfact_overall_age_sex, post_change_overall_age_sex) %>%
mutate(cases_averted = .epred_counterf-.epred,
pct_change = (.epred - .epred_counterf)/.epred_counterf) %>%
group_by(age, sex) %>%
mean_qi(cases_averted, pct_change) %>%
ungroup() %>%
mutate(year = "Overall (1958-1963)")
age_sex_txt <- counter_post_overall_age_sex %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
transmute(year = as.character(year),
sex = sex,
age = age,
cases_averted = glue::glue("{cases_averted}\n({cases_averted.lower} to {cases_averted.upper})"),
pct_change = glue::glue("{pct_change}\n({pct_change.lower} to {pct_change.upper})"))
age_sex_txt %>% datatable()
f3d <- counter_post_overall_age_sex %>%
mutate(sex = case_when(sex=="M" ~ "Male",
sex=="F" ~ "Female")) %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
ggplot() +
geom_pointrange(aes(x = age, y=cases_averted, ymin=cases_averted.lower, ymax=cases_averted.upper, colour=sex)) +
facet_grid(.~sex) +
coord_flip() +
scale_colour_manual(values = c("purple", "darkorange"), name="") +
scale_y_continuous(labels = comma) +
labs(x="",
y="Number (95% UI) of TB cases averted (1958-1963)",
colour="") +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA),
legend.position = "none")
f3d
Join together for Figure 2.
(f3a + f3b) / (f3c + f3d) + plot_annotation(tag_levels = "A")
ggsave(here("figures/f3.png"), width = 12)
(Very much a work in progress!)
mdata4 <- division_inc %>%
filter(tb_type=="Pulmonary") %>%
mutate(acf_period = case_when(year %in% c(1950:1956) ~ "a. pre-acf",
year %in% c(1957) ~ "b. acf",
year %in% c(1958:1963) ~ "c. post-acf")) %>%
group_by(division) %>%
mutate(y_num = row_number()) %>%
ungroup()
ggplot(data = tibble(x = seq(from = 0, to = 1500, by = 10)),
aes(x = x, y = dgamma(x, shape = 2, rate = 0.001))) +
geom_area(color = "transparent",
fill = "#DE0D92") +
scale_x_continuous(NULL) +
scale_y_continuous(NULL, breaks = NULL) +
coord_cartesian(xlim = c(0, 1500)) +
ggtitle(expression(brms~~gamma(0.5*", "*0.0001)~shape~prior))
winform_prior3 <- c(prior(normal(0, 0.1), class = Intercept),
prior(gamma(2, 0.0001), class = shape),
prior(normal(0, 0.0001), class = b, coef = "acf_periodb.acf"),
prior(normal(0, 0.0001), class = b, coef = "acf_periodc.postMacf"),
prior(normal(0, 0.0001), class = b, coef = "y_num"),
prior(normal(0, 0.0001), class = b, coef = "y_num:acf_periodb.acf"),
prior(normal(0, 0.0001), class = b, coef = "y_num:acf_periodc.postMacf"),
prior(cauchy(0,5), class="sd"))
m_pulmonary_division_prior <- brm(
cases ~ y_num + acf_period + acf_period:y_num + (1 + y_num + acf_period + acf_period:y_num | division ) + offset(log(population_without_inst_ship)),
data = mdata4,
family = negbinomial(),
seed = 1234,
chains = 4, cores = 4,
prior = winform_prior3,
save_pars = save_pars(all = TRUE),
sample_prior = "only",
backend = "cmdstanr",
warmup = 1000,
control = list(adapt_delta = 0.9))
conditional_effects(m_pulmonary_division_prior)
plot_counterfactual(model=m_pulmonary_division_prior, model_data=mdata4, population_denominator = population_without_inst_ship, outcome = inc_100k, grouping_var = division, division)
m_pulmonary_division <- brm(
cases ~ y_num + acf_period + acf_period:y_num + (1 + y_num + acf_period + acf_period:y_num | division) + offset(log(population_without_inst_ship)),
data = mdata4,
family = negbinomial(),
seed = 1234,
chains = 4, cores = 4,
prior = winform_prior3,
save_pars = save_pars(all = TRUE),
backend = "cmdstanr",
warmup = 1000,
control = list(adapt_delta = 0.9))
summary(m_pulmonary_division)
plot(m_pulmonary_division)
pp_check(m_pulmonary_division, type='ecdf_overlay')
plot_counterfactual(model=m_pulmonary_division, model_data=mdata4, population_denominator = population_without_inst_ship, outcome = inc_100k, grouping_var = division, division)
summarise_change(model_data=mdata4, model = m_pulmonary_division, population_denominator = population_without_inst_ship, grouping_var = division) %>%
map(datatable)
Make a table of counterfactual effects for the manuscript
pulmonary_counterfactuals <- tidy_counterfactuals(overall_pulmonary_counterf$counter_post)
pulmonary_counterfactuals_overall <- tidy_counterfactuals_overall(overall_pulmonary_counterf$counter_post_overall)
extrapulmonary_counterfactuals <- tidy_counterfactuals(overall_ep_counterf$counter_post)
extrapulmonary_counterfactuals_overall <- tidy_counterfactuals_overall(overall_ep_counterf$counter_post_overall)
age_sex_counterfactuals_overall <- tidy_counterfactuals_overall(counter_post_overall_age_sex) %>% mutate(model = "Age-sex")
bind_rows(
bind_rows(pulmonary_counterfactuals, pulmonary_counterfactuals_overall) %>% mutate(model = "Pulmonary TB", sex=NA, age=NA),
bind_rows(extrapulmonary_counterfactuals, extrapulmonary_counterfactuals_overall) %>% mutate(model = "Extra-pulmonary TB", sex=NA, age=NA),
age_sex_counterfactuals_overall) %>%
select(model, year, age, sex, diff_inc, rr_inc, cases_averted, pct_change)
#experimental below here #############
What about a multilevel model with Wards nested within divisions?
mdata4 <- ward_inc %>%
filter(tb_type=="Pulmonary") %>%
mutate(acf_period = case_when(year %in% c(1950:1956) ~ "a. pre-acf",
year %in% c(1957) ~ "b. acf",
year %in% c(1958:1963) ~ "c. post-acf")) %>%
group_by(ward) %>%
mutate(y_num = row_number()) %>%
ungroup()
winform_prior4 <- c(prior(normal(0, 1), class = Intercept),
#prior(gamma(1, 0.01), class = shape),
prior(normal(0, 1), class = b),
prior(cauchy(0,5), class="sd"),
prior(lkj(2), class="cor"))
m_pulmonary_nested <- brm(
cases ~ y_num + acf_period + acf_period:y_num + (1 + y_num + acf_period + acf_period:y_num | division/ward),
data = mdata4,
family = negbinomial(),
seed = 1234,
chains = 4, cores = 4,
prior = winform_prior4,
save_pars = save_pars(all = TRUE),
backend = "cmdstanr",
warmup = 1000)
summary(m_pulmonary_nested)
conditional_effects(m_pulmonary_nested)